From Grouped Bars to a Race Plot

Task: show data across many levels of a facet

Problems: Facet grid and facet wrap rapidly get out of their depth

Questions: Is it important to see all the data at one time?

Glance at the Data

head(dtset)
##   X_date_year X_country_residence       X_country_origin
## 1        1993        South Africa             Mozambique
## 2        1994        South Africa                 Angola
## 3        1994        South Africa Dem. Rep. of the Congo
## 4        1994        South Africa               Ethiopia
## 5        1994        South Africa                Liberia
## 6        1994        South Africa             Mozambique
##                          X_population_type X_affected
## 1 Refugees (incl. refugee-like situations)     250000
## 2 Refugees (incl. refugee-like situations)        581
## 3 Refugees (incl. refugee-like situations)        808
## 4 Refugees (incl. refugee-like situations)         66
## 5 Refugees (incl. refugee-like situations)         50
## 6 Refugees (incl. refugee-like situations)      90000
plotDT <- dtset %>%
  filter(X_country_origin != "Various/Unknown") %>%
  filter(X_population_type == "Refugees (incl. refugee-like situations)") %>%
  filter(X_date_year > 1995) %>%
  group_by(X_date_year) %>%
  mutate(rank = min_rank(-X_affected) * 1) %>%
  ungroup() %>%
  filter(rank <= 10)

tail(plotDT)
## # A tibble: 6 x 6
##   X_date_year X_country_resid… X_country_origin X_population_ty… X_affected
##         <int> <fct>            <fct>            <fct>                 <int>
## 1        2017 South Africa     Eritrea          Refugees (incl.…       1924
## 2        2017 South Africa     Ethiopia         Refugees (incl.…      17562
## 3        2017 South Africa     Rwanda           Refugees (incl.…       1295
## 4        2017 South Africa     Somalia          Refugees (incl.…      26977
## 5        2017 South Africa     Uganda           Refugees (incl.…        549
## 6        2017 South Africa     Zimbabwe         Refugees (incl.…       4558
## # … with 1 more variable: rank <dbl>
plotDT$X_country_origin <- ifelse(plotDT$X_country_origin == "Serbia and Kosovo (S/RES/1244 (1999))", "Serbia/Kosovo", as.character(plotDT$X_country_origin))

plotDT[plotDT$X_affected == max(plotDT$X_affected),]
## # A tibble: 1 x 6
##   X_date_year X_country_resid… X_country_origin X_population_ty… X_affected
##         <int> <fct>            <chr>            <fct>                 <int>
## 1        2015 South Africa     Somalia          Refugees (incl.…      41458
## # … with 1 more variable: rank <dbl>
plotDT$state_time <- ifelse(plotDT$X_date_year == max(plotDT$X_date_year), 5, 1)

Raw plot, neither faceted nor animated

Here the x axis is date, with bars grouped by country

baseplot1 <- ggplot(plotDT, 
    # Set aesthetics
    aes(X_date_year, 
        group = X_country_origin, 
        fill = as.factor(X_country_origin), 
        color = as.factor(X_country_origin)))+
    theme_bw()+
    theme(legend.position = "bottom")+
    geom_bar(aes(y = X_affected), stat = "identity", position = "dodge")

baseplot1

Flip axis directions

baseplot2 <- ggplot(plotDT, 
    # Set aesthetics
    aes(X_date_year, 
        group = X_country_origin, 
        fill = as.factor(X_country_origin), 
        color = as.factor(X_country_origin)))+
    theme_bw()+
    theme(legend.position = "bottom")+
    coord_flip(clip = "off", expand = FALSE, ylim = c(0, 50000)) +
    geom_bar(aes(y = X_affected), stat = "identity", position = "dodge")

baseplot2

Add descriptive labels

baseplot2 <- ggplot(plotDT, 
    # Set aesthetics
    aes(X_date_year, 
        group = X_country_origin, 
        fill = as.factor(X_country_origin), 
        color = as.factor(X_country_origin)))+
    theme_bw()+
    theme(legend.position = "bottom")+
        geom_text(aes(y = 0, label = paste(X_country_origin, " ")), vjust = 0.2, hjust = 1) +
    geom_text(aes(y = X_affected, 
        label = as.character(X_affected)), 
        color = "black", vjust = 0.2, hjust = -.5)+
    coord_flip(clip = "off", expand = FALSE, ylim = c(0, 50000)) +
    geom_bar(aes(y = X_affected), stat = "identity", position = "dodge")

baseplot2

Stop the bar grouping (dodge), switch axis to country from year

baseplot2 <- ggplot(plotDT, 
    # Set aesthetics
    aes(x=rank, 
        group = X_country_origin, 
        fill = as.factor(X_country_origin), 
        color = as.factor(X_country_origin)))+
    theme_bw()+
    theme(legend.position = "bottom")+
    geom_text(aes(y = 0, label = paste(X_country_origin, " ")), vjust = 0.2, hjust = 1) +
    geom_text(aes(y = X_affected, 
        label = as.character(X_affected)), 
        color = "black", vjust = 0.2, hjust = -.5)+
    coord_flip(clip = "off", expand = FALSE, ylim = c(0, 50000)) +
    geom_bar(aes(y = X_affected), stat = "identity", position = "identity")

baseplot2

Adjust margins, fix axis text, drop legend

baseplot2 <- ggplot(plotDT, 
    # Set aesthetics
    aes(x=rank, 
        group = X_country_origin, 
        fill = as.factor(X_country_origin), 
        color = as.factor(X_country_origin)))+
    theme_bw()+
    theme(legend.position = "none",
        axis.ticks.y = element_blank(),  # These relate to the axes post-flip
        axis.text.y  = element_blank(),  # These relate to the axes post-flip
        axis.title.y = element_blank(),
        plot.margin = margin(1,1,1,5, "cm"))+
    geom_text(aes(y = 0, label = paste(X_country_origin, " ")), vjust = 0.2, hjust = 1) +
    geom_text(aes(y = X_affected, 
        label = as.character(X_affected)), 
        color = "black", vjust = 0.2, hjust = -.5)+
    coord_flip(clip = "off", expand = FALSE, ylim = c(0, 50000)) +
    geom_bar(aes(y = X_affected), stat = "identity", position = "identity")

baseplot2

Prettify Y, reverse X direction

plotDT2 <- plotDT[plotDT$X_date_year == 2002,]
baseplot <- ggplot(plotDT, 
    # Set aesthetics
    aes(x = rank,
        group = X_country_origin, 
        fill = as.factor(X_country_origin), 
        color = as.factor(X_country_origin)))+
    theme_bw()+
    theme(legend.position = "none",
        axis.ticks.y = element_blank(),  # These relate to the axes post-flip
        axis.text.y  = element_blank(),  # These relate to the axes post-flip
        axis.title.y = element_blank(),
        plot.margin = margin(1,1,1,5, "cm"))+
    geom_bar(aes(y = X_affected), stat = "identity", position = "identity")+
    geom_text(aes(y = 0, label = paste(X_country_origin, " ")), vjust = 0.2, hjust = 1) +
    geom_text(aes(y = X_affected, 
        label = as.character(X_affected)), 
        color = "black", vjust = 0.2, hjust = -.5)+
    scale_y_continuous(labels = scales::comma) +
    scale_x_reverse() +
    coord_flip(clip = "off", expand = FALSE, ylim = c(0, 50000)) 
        
baseplot

Ways to Render This

We need to present the data on an annual basis in a way that tells us:

  1. What locations refugees come from

These may be places with high instability or political turmoil at a given time

  1. How the top locations within a year compare to the other top 10 locations

Add Facets

Not good.

baseplot+facet_wrap(X_date_year~.)

baseplot+facet_grid(X_date_year~.)

Animate

animp <- baseplot+
    labs(title = "Refugees Residing in South Africa by Origin, {closest_state}", y="Affected Persons")+
    transition_states(
        X_date_year,
        transition_length = 4, 
        state_length = c(rep(1, 21), 60))+
  ease_aes('cubic-in') 

animate(animp, fps = 20, duration = 20, width = 800, height = 600)